# Library
library(readr)
library(dplyr)
library(tidyr)

# Set the Working Directory
setwd("~/Desktop/PolicingTStops/BadApples/Replication/Data/STOPS_Extract")

#
# From Raw to a Data Set
#

# Stop Database
Stop = read_delim("Stop.txt",
                  "\t", escape_double = FALSE, col_names = FALSE, 
                  trim_ws = TRUE)

colnames(Stop) = c("StopID","AgencyDescription","StopDate",
                   "Purpose","Action","DriverArrest",
                   "PassengerArrest","EncounterForce","EngageForce",
                   "OfficerInjury","DriverInjury","PassengerInjury",
                   "OfficerId","StopLocation","StopCity")

Stop$AgencyType = ifelse(grepl("Police Department",Stop$AgencyDescription),"PD",
                         ifelse(grepl("Sheriff",Stop$AgencyDescription),"SO",
                                ifelse(grepl("NC Division",Stop$AgencyDescription)|
                                         grepl("NC State Bureau",Stop$AgencyDescription)|
                                         grepl("NC Alcohol",Stop$AgencyDescription)|
                                         grepl("NC State Fairgrounds",Stop$AgencyDescription)|
                                         grepl("NC State Highway",Stop$AgencyDescription)|
                                         grepl("NC State Parks",Stop$AgencyDescription)|
                                         grepl("NC Wildlife",Stop$AgencyDescription),
                                       "NC","Other")))
Stop$AgencyType = ifelse(grepl("Campus",Stop$AgencyType)|
                           grepl("University",Stop$AgencyType)|
                           grepl("School",Stop$AgencyType)|
                           grepl("UNC ",Stop$AgencyType)|
                           grepl("College",Stop$AgencyType)|
                           grepl("State",Stop$AgencyType),
                         "Other",Stop$AgencyType)

Stop$stop = 1

Stop = subset(Stop, Stop$AgencyType == "PD" | Stop$AgencyType == "SO")
Stop$year = apply(as.matrix(as.character(Stop$StopDate)),1,
                function(x){substr(x,1,4)})
Stop = subset(Stop, Stop$year>2013&Stop$year<2020)

save(Stop,file="Stop_MuniSherriff.RData")

# Person Database
PERSON = read_delim("PERSON.txt",
                    "\t", escape_double = FALSE, col_names = FALSE, 
                    trim_ws = TRUE)

colnames(PERSON) = c("PersonID","StopID","PersonType","Age",
                     "Gender","Ethnicity","Race")

table(PERSON$Ethnicity)

PERSON$race2 = ifelse(PERSON$Race == "W" & PERSON$Ethnicity!="H","White",
                      ifelse(PERSON$Race == "B" & PERSON$Ethnicity!="H","Black",
                             ifelse(PERSON$Ethnicity=="H","Latinx","Other")))

table(PERSON$PersonType)

PERSON = subset(PERSON, PERSON$PersonType=="D")

summary(PERSON)

save(PERSON,file="Person_DriverOnly.RData")

# Search Database
Search = read_delim("Search.txt",
                    "\t", escape_double = FALSE, col_names = FALSE, 
                    trim_ws = TRUE)

colnames(Search) = c("SearchID","StopID","PersonID",
                     "SearchType","VehicleSearch","DriverSearch",
                     "PassengerSearch","PropertySearch",
                     "VehicleSeized","PersonalPropertySeized",
                     "OtherPropertySeized")

# Contraband Database
Contraband = read_delim("Contraband.txt",
                        "\t", escape_double = FALSE, col_names = FALSE, 
                        trim_ws = TRUE)

colnames(Contraband) = c("ContrabandID","SearchID","PersonID",
                         "StopID","Ounces",'Pounds',"Pints",
                         "Gallons","Dosages","Grams","Kilos",
                         "Money","Weapons","DollarAmt")

#
# Merging Together the Data Sets & Creating the Collapsed File
#

nc = merge(Stop, PERSON, by = "StopID")
dim(nc)

nc = merge(nc,Search, by = c("StopID","PersonID"),all.x=T)
dim(nc)

nc$any_search = ifelse(is.na(nc$SearchID),1,0)
nc$searchoccur = ifelse(nc$any_search == 0, 0, 
                        ifelse(nc$SearchType==1|nc$SearchType==3,1,NA))

nc = merge(nc,Contraband, by = c("StopID","PersonID","SearchID"),all.x=T)
dim(nc)

nc$contraoccur = ifelse(is.na(nc$ContrabandID),1,0)

save(nc,file="~/Desktop/PolicingTStops/BadApples/Data/NC_Cleaned2.RData")

#
# Aggregating by Officer
#

# Cleaning and Subsetting the Data Further
nc = subset(nc, nc$Purpose != 10)

nc$searchoccur = ifelse(is.na(nc$SearchID),0,
                        ifelse(nc$SearchType=="1"|
                                 nc$SearchType=="3",1,NA))

nc$contra2 = ifelse(!is.na(nc$ContrabandID) &  
                      nc$searchoccur == 1, 1, 0)
nc$stop = ifelse(is.na(nc$searchoccur),NA,1)

nc$unique.officer = paste(nc$AgencyDescription,nc$OfficerId,sep="::")

# Officer Collapse
nc.officers = aggregate(nc[,c("stop","searchoccur","contra2")],
                        by=list(nc$unique.officer,nc$race2),
                        function(x){sum(x,na.rm=T)})

# Saving the Collapse
save(nc.officers, file="~/Desktop/PolicingTStops/BadApples/Replication/Data/NCOfficers.RData")

# Aggregate Summary Information 
nc = subset(nc, nc$race2=="Black"|nc$race2=="White")

dim(nc)
table(nc$race2)
table(nc$searchoccur)
table(nc$race2,nc$searchoccur)

table(table(nc$AgencyDescription)>0)
table(table(nc$unique.officer)>0)

table(table(nc$AgencyDescription[nc$race2=="Black"])>0)
table(table(nc$unique.officer[nc$race2=="Black"])>0)

table(table(nc$AgencyDescription[nc$race2=="White"])>0)
table(table(nc$unique.officer[nc$race2=="White"])>0)

######################################################################
######################################################################
######################################################################

#
# Clearing and Setting up for the Measure
#

rm(list=ls())
library(readr)
library(dplyr)
library(tidyr)

setwd("~/Desktop/PolicingTStops/BadApples/Replication/")

load("Data/NCOfficers.RData")

# Identifying those that have made more than 50 stops of black drivers + white drivers AND at least one search  of each group as well.

nc.officer.thres = pivot_wider(data=nc.officers,
                               id_cols="Group.1",
                               names_from="Group.2",
                               values_from = c("stop","searchoccur","contra2"))
nc.keep = nc.officer.thres$Group.1[which(nc.officer.thres$stop_Black>49&
                                           nc.officer.thres$stop_White>49&
                                           nc.officer.thres$searchoccur_Black>0&
                                           nc.officer.thres$searchoccur_White>0)]
nc.officers$include = ifelse(as.character(nc.officers$Group.1)%in%nc.keep,
                             1,0)
table(nc.officers$include)
nc.officers.sm = subset(nc.officers, nc.officers$include==1)
nc.officers.sm = subset(nc.officers.sm, 
                        nc.officers.sm$Group.2=="Black"|
                          nc.officers.sm$Group.2=="White")
nc.officers.sm$Department = apply(as.matrix(as.character(nc.officers.sm$Group.1)),1,
                                  function(x){strsplit(x,"::",fixed=T)[[1]][1]})
colnames(nc.officers.sm) = c("officer","driver_rg","stops","searches","contraband",
                             "include","Department")

# Officer set up for running Stan model. 
nc.officer.stan = list(N = nrow(nc.officers.sm),
                       R = length(unique(nc.officers.sm$driver_rg)),
                       D = length(unique(nc.officers.sm$officer)),
                       K = length(unique(nc.officers.sm$Department)),
                       r = as.integer(as.factor(nc.officers.sm$driver_rg)),
                       d = as.integer(as.factor(nc.officers.sm$officer)),
                       k = as.integer(as.factor(nc.officers.sm$Department)),
                       n = nc.officers.sm$stops,
                       s = nc.officers.sm$searches,
                       h = nc.officers.sm$contraband)

# The original specification is 5 chains, a 4000 warmup iteaations, 
# 12,000 iterations that are kept, and a delta of 0.95.
library(rstan)
rstan_options(auto_write = TRUE)

options(mc.cores = 1)
nc.model.multi = stan("Scripts/fasttt-master/stan_models/working_countrywide_mixture2.stan",
                      data = nc.officer.stan,
                      chains = 1,
                      cores = 1,
                      warmup = 5000,#Orig: 4000
                      iter = 25000,#Oirg: 12500
                      thin = 5,
                      seed = 98713,
                      control = list(adapt_delta = 0.99,#Orig: 0.95
                                     max_treedepth = 20))
save(nc.model.multi,file="Models/NC_FastTT_50_temp.RData")

# Traceplot for a random sample of thresholds. 
set.seed(987)
t_for_trace = sample(1:7414,8)
t_names_for_trace = paste("t_i[",t_for_trace,"]",sep="")

png("Figures/NC_Traceplot_50.png",1254,769)
traceplot(nc.model.multi,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/NC_SampOffDensity_50.png",629,592)
plot(nc.model.multi, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

# Checking that it's only phi_d[1] and XXX that have missing values for R Hat. Checking that R Hat is less than 1.01 for all estimates. 
nc.summary = summary(nc.model.multi)$summary

nc.summary[which(is.na(nc.summary[,10])),]
table(nc.summary[,10]<1)
table(nc.summary[,10]<1.01)

# Cropping the model output to what is needed.
nc.summary.ti = nc.summary[2:7415,]
nc.output = data.frame("fasttt.mean" = nc.summary.ti[,1],
                       "fasttt.lower" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2],
                       "fasttt.upper" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2])

# Merging in that information. 
nc.officer.analysis = cbind(nc.officers.sm,nc.output)

# Transforming the data set from long to wide. 
nc.officer.sm.temp = nc.officer.analysis[,c(1:5,8:10)]
nc.officer.wide = tidyr::pivot_wider(data=nc.officer.sm.temp,
                                     id_cols="officer",
                                     names_from="driver_rg",
                                     values_from = c("stops","searches","contraband",
                                                     "fasttt.mean","fasttt.lower",
                                                     "fasttt.upper"))

# Calculating additional values of interest.
nc.officer.wide$search_rate_white = nc.officer.wide$searches_White/
  nc.officer.wide$stops_White
nc.officer.wide$search_rate_black = nc.officer.wide$searches_Black/
  nc.officer.wide$stops_Black
nc.officer.wide$hit_rate_white = ifelse(nc.officer.wide$searches_White>0&
                                          nc.officer.wide$searches_Black>0,
                                        nc.officer.wide$contraband_White/
                                          nc.officer.wide$searches_White,NA)
nc.officer.wide$hit_rate_black = ifelse(nc.officer.wide$searches_Black>0&
                                          nc.officer.wide$searches_White>0,
                                        nc.officer.wide$contraband_Black/
                                          nc.officer.wide$searches_Black,NA)
nc.officer.wide$dif.tt = nc.officer.wide$fasttt.mean_White-
  nc.officer.wide$fasttt.mean_Black
nc.officer.wide$dif.tt.cat = ifelse(nc.officer.wide$dif.tt>0,
                                    ifelse(nc.officer.wide$fasttt.lower_White>
                                             nc.officer.wide$fasttt.upper_Black,
                                           "3 Black Drivers Disp. Neg.",
                                           "2 Approx. Equal"),
                                    ifelse(nc.officer.wide$fasttt.lower_Black>
                                             nc.officer.wide$fasttt.upper_White,
                                           "1 White Drivers Disp. Neg.",
                                           "2 Approx. Equal"))
nc.officer.wide$dif.sr = nc.officer.wide$search_rate_white-
  nc.officer.wide$search_rate_black
nc.officer.wide$dif.sr.cat = ifelse(nc.officer.wide$dif.sr==0,
                                    "2 Apprrox. Equal",
                                    ifelse(nc.officer.wide$dif.sr>0,
                                           "1 White Drivers Disp. Neg.",
                                           "3 Black Drivers Disp. Neg."))
nc.officer.wide$dif.hr = nc.officer.wide$hit_rate_white-
  nc.officer.wide$hit_rate_black
nc.officer.wide$dif.hr.cat = ifelse(nc.officer.wide$dif.hr==0,
                                    "2 Approx. Equal",
                                    ifelse(nc.officer.wide$dif.hr<0,
                                           "1 White Drivers Disp. Neg.",
                                           "3 Black Drivers Disp. Neg."))
save(nc.officer.wide,file="Data/NC_WideOfficerData.RData")

fig1d = ggplot(nc.officer.wide,aes(fasttt.mean_White,fasttt.mean_Black)) + 
  geom_point(size=1.5) + theme_bw(base_size=15) +
  xlab(NULL) + ylab(NULL) +
  geom_abline(intercept = 0, slope = 1,color = "darkgray",size = 0.5) +
  geom_abline(intercept = 0, slope = 2,color = "darkgray",linetype="dashed",size = 0.5) +
  geom_abline(intercept = 0, slope = 0.5,color = "darkgray",linetype="dashed",size = 0.5) +
  xlim(0,5)+ylim(0,5)+
  theme(panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour=NA),
        legend.position = "none")
ggsave(plot=fig1d,filename = "Figures/Figure1d.eps")
png("Figures/NC_TT_Officers.png",547,441)
fig1d
dev.off()

fig1c = ggplot(nc.officer.wide,aes(search_rate_white,search_rate_black)) + 
  geom_point(size=1.5) + theme_bw(base_size=15) +
  xlab(NULL) + ylab(NULL) +
  geom_abline(intercept = 0, slope = 1,color = "darkgray",size = 0.5) +
  geom_abline(intercept = 0, slope = 2,color = "darkgray",linetype="dashed",size = 0.5) +
  geom_abline(intercept = 0, slope = 0.5,color = "darkgray",linetype="dashed",size = 0.5) +
  xlim(0,1)+ylim(0,1)+
  theme(panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour=NA),
        legend.position = "none")
ggsave(plot=fig1c,filename="Figures/Figure1c.eps")
png("Figures/NC_SR_Officers.png",547,441)
fig1c
dev.off()

dim(nc.officer.wide)
length(unique(apply(as.matrix(as.character(nc.officer.wide$officer)),
      1,function(x){strsplit(x,"::",fixed=T)[[1]][1]})))
sum(nc.officer.wide$stops_Black) + sum(nc.officer.wide$stops_White)
sum(nc.officer.wide$searches_Black) + sum(nc.officer.wide$searches_White)

prop.table(table(nc.officer.wide$dif.sr.cat))
prop.table(table(nc.officer.wide$dif.tt.cat))

srr.nc = nc.officer.wide$search_rate_black/nc.officer.wide$search_rate_white

prop.table(table(srr.nc>=2 | srr.nc <=0.5))

######################################################################
######################################################################
######################################################################

# Library
library(readr)
library(dplyr)
library(tidyr)

# Set the Working Directory
setwd("~/Desktop/PolicingTStops/BadApples/Replication/")

# Loading in the data
load("Data/NC_WideOfficerData.RData")
load("Data/NC_Cleaned.RData")

# Cleaning and Subsetting the Data Further
nc = subset(nc, nc$Purpose != 10)

nc$searchoccur = ifelse(is.na(nc$SearchID),0,
                        ifelse(nc$SearchType=="1"|
                                 nc$SearchType=="3",1,NA))

nc$contra2 = ifelse(!is.na(nc$ContrabandID) &  
                      nc$searchoccur == 1, 1, 0)
nc$stop = ifelse(is.na(nc$searchoccur),NA,1)

nc$unique.officer = paste(nc$AgencyDescription,nc$OfficerId,sep="::")

# Identify high disparity officers. 
nc.officer.wide$ratio = abs(nc.officer.wide$fasttt.mean_White/nc.officer.wide$fasttt.mean_Black)
summary(nc.officer.wide$ratio)
nc.officer.extreme.bw = boxplot(nc.officer.wide$ratio)
nc.officer.extreme.id = nc.officer.wide$officer[which(nc.officer.wide$ratio%in%
                                                        unique(nc.officer.extreme.bw$out))]
nc$exclude.officer = ifelse(nc$unique.officer %in% nc.officer.extreme.id, 1, 0)
table(nc$exclude.officer)

nc.agencies = unique(apply(as.matrix(as.character(nc.officer.wide$officer)),
                           1,
                           function(x){strsplit(x,"::",fixed=T)[[1]][1]}))
nc.agencies.extreme = unique(apply(as.matrix(as.character(nc.officer.wide$officer[which(nc.officer.wide$ratio%in%unique(nc.officer.extreme.bw$out))])),
                                   1,
                                   function(x){strsplit(x,"::",fixed=T)[[1]][1]}))

# Collapsing by Agency.
nc.dept = aggregate(nc[,c("stop","searchoccur","contra2")],
                    by=list(nc$AgencyDescription,
                            nc$race2),
                    sum,na.rm=T)
nc.dept.temp = pivot_wider(data=nc.dept,
                           id_cols="Group.1",
                           names_from="Group.2",
                           values_from = c("stop","searchoccur","contra2"))
nc.keep = nc.dept.temp$Group.1[which(nc.dept.temp$stop_Black>49&
                                       nc.dept.temp$stop_White>49&
                                       nc.dept.temp$searchoccur_Black>0&
                                       nc.dept.temp$searchoccur_White>0)]
nc.dept$include = ifelse(as.character(nc.dept$Group.1)%in%nc.keep,1,0)

save(nc.dept,file="Data/NC_Depts_50.RData")

# Collapsing by Agency w/out "extremes."
nc.dept = aggregate(nc[nc$exclude.officer==0,
                       c("stop","searchoccur","contra2")],
                    by=list(nc$AgencyDescription[nc$exclude.officer==0],
                            nc$race2[nc$exclude.officer==0]),
                    sum,na.rm=T)
nc.dept.temp = pivot_wider(data=nc.dept,
                               id_cols="Group.1",
                               names_from="Group.2",
                               values_from = c("stop","searchoccur","contra2"))
nc.keep = nc.dept.temp$Group.1[which(nc.dept.temp$stop_Black>49&
                                       nc.dept.temp$stop_White>49&
                                       nc.dept.temp$searchoccur_Black>0&
                                       nc.dept.temp$searchoccur_White>0)]
nc.dept$include = ifelse(as.character(nc.dept$Group.1)%in%nc.keep,
                             1,0)

save(nc.dept,file="Data/NC_Depts_50_NoOutliers.RData")

##
## Agency Models -- Full
##

rm(list=ls())

# Load in the collapsed data sets. 
load("Data/NC_Depts_50.RData")

# Setting up the officer stan list and cropping the data set further
nc.dept.sm = subset(nc.dept, nc.dept$include==1)

nc.dept.sm = subset(nc.dept.sm, nc.dept.sm$Group.2=="White"|
                      nc.dept.sm$Group.2=="Black")

colnames(nc.dept.sm) = c("agency","driver_rg","stops","searches","contraband",
                         "include")

save(nc.dept.sm, file="Data/NC_AgencySm_Data_50Thresh.RData")

# Agency set up for running Stan model. 
nc.agency.stan = list(N = nrow(nc.dept.sm),
                      R = length(unique(nc.dept.sm$driver_rg)),
                      D = length(unique(nc.dept.sm$agency)),
                      r = as.integer(as.factor(nc.dept.sm$driver_rg)),
                      d = as.integer(as.factor(nc.dept.sm$agency)),
                      n = nc.dept.sm$stops,
                      s = nc.dept.sm$searches,
                      h = nc.dept.sm$contraband)

# The original specification is 5 chains, a 4000 warmup iteaations, 
# 12,000 iterations that are kept, and a delta of 0.95.
library(rstan)
rstan_options(auto_write = TRUE)

options(mc.cores = 5)
nc.model.multi = stan("Scripts/fasttt-master/stan_models/model_mixture.stan",
                      data = nc.agency.stan,
                      chains = 5,
                      cores = 5,
                      warmup = 5000,#Orig: 4000
                      iter = 25000,#Oirg: 12500
                      thin = 5,
                      seed = 98713,
                      control = list(adapt_delta = 0.95,#Orig: 0.95
                                     max_treedepth = 20))

save(nc.model.multi,file="Models/NC_FastTT_Agency50Thresh.RData")

##
## Agency Models -- No Outliers
##

rm(list=ls())

# Load in the collapsed data sets. 
load("Data/NC_Depts_50_NoOutliers.RData")

# Setting up the officer stan list and cropping the data set further
nc.dept.sm = subset(nc.dept, nc.dept$include==1)

nc.dept.sm = subset(nc.dept.sm, nc.dept.sm$Group.2=="White"|
                      nc.dept.sm$Group.2=="Black")

colnames(nc.dept.sm) = c("agency","driver_rg","stops","searches","contraband",
                         "include")

save(nc.dept.sm, file="Data/NC_AgencySm_Data_50Thresh_NoOut.RData")

# Agency set up for running Stan model. 
nc.agency.stan = list(N = nrow(nc.dept.sm),
                      R = length(unique(nc.dept.sm$driver_rg)),
                      D = length(unique(nc.dept.sm$agency)),
                      r = as.integer(as.factor(nc.dept.sm$driver_rg)),
                      d = as.integer(as.factor(nc.dept.sm$agency)),
                      n = nc.dept.sm$stops,
                      s = nc.dept.sm$searches,
                      h = nc.dept.sm$contraband)

# The original specification is 5 chains, a 4000 warmup iteaations, 
# 12,000 iterations that are kept, and a delta of 0.95.
library(rstan)
rstan_options(auto_write = TRUE)

options(mc.cores = 5)
nc.model.multi = stan("Scripts/fasttt-master/stan_models/model_mixture.stan",
                      data = nc.agency.stan,
                      chains = 5,
                      cores = 5,
                      warmup = 5000,#Orig: 4000
                      iter = 25000,#Oirg: 12500
                      thin = 5,
                      seed = 98713,
                      control = list(adapt_delta = 0.9,#Orig: 0.95
                                     max_treedepth = 20))

save(nc.model.multi,file="Models/NC_FastTT_Agency50Thresh_NoOut.RData")

###
### Analysis
###

rm(list=ls())

#
# Building the baseline data set
#

# Load in the collapsed data sets. 
load("Data/NC_AgencySm_Data_50Thresh.RData")
load("Models/NC_FastTT_Agency50Thresh.RData")

# Traceplot for a random sample of thresholds. 
set.seed(987)
t_for_trace = sample(1:434,8)
t_names_for_trace = paste("t_i[",t_for_trace,"]",sep="")

png("Figures/NC_Traceplot_AgencyFull50.png",1254,769)
traceplot(nc.model.multi,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/NC_SampOffDensity_AgencyFull50.png",629,592)
plot(nc.model.multi, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

# Extracting the information + final check.
nc.summary = summary(nc.model.multi)$summary

nc.summary[which(is.na(nc.summary[,10])),]
table(nc.summary[,10]<1)
table(nc.summary[,10]<1.01)

# Cropping the model output to what is needed.
nc.summary.ti = nc.summary[2:435,]
nc.output = data.frame("fasttt.mean" = nc.summary.ti[,1],
                       "fasttt.lower" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2],
                       "fasttt.upper" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2])

# Merging in that information. 
nc.agency.analysis = cbind(nc.dept.sm,nc.output)

# Transforming the data set from long to wide. 
nc.agency.sm.temp = nc.agency.analysis[,c(1:5,7:9)]
nc.agency.wide = tidyr::pivot_wider(data=nc.agency.sm.temp,
                                    id_cols="agency",
                                    names_from="driver_rg",
                                    values_from = c("stops","searches","contraband",
                                                    "fasttt.mean","fasttt.lower",
                                                    "fasttt.upper"))

# Calculating additional values of interest.
nc.agency.wide$search_rate_white = nc.agency.wide$searches_White/
  nc.agency.wide$stops_White
nc.agency.wide$search_rate_black = nc.agency.wide$searches_Black/
  nc.agency.wide$stops_Black
nc.agency.wide$hit_rate_white = ifelse(nc.agency.wide$searches_White>0&
                                         nc.agency.wide$searches_Black>0,
                                       nc.agency.wide$contraband_White/
                                         nc.agency.wide$searches_White,NA)
nc.agency.wide$hit_rate_black = ifelse(nc.agency.wide$searches_Black>0&
                                         nc.agency.wide$searches_White>0,
                                       nc.agency.wide$contraband_Black/
                                         nc.agency.wide$searches_Black,NA)
nc.agency.wide$dif.tt = nc.agency.wide$fasttt.mean_White-
  nc.agency.wide$fasttt.mean_Black
nc.agency.wide$dif.tt.cat = ifelse(nc.agency.wide$dif.tt>0,
                                   ifelse(nc.agency.wide$fasttt.lower_White>
                                            nc.agency.wide$fasttt.upper_Black,
                                          "3 Black Drivers Disp. Neg.",
                                          "2 Approx. Equal"),
                                   ifelse(nc.agency.wide$fasttt.lower_Black>
                                            nc.agency.wide$fasttt.upper_White,
                                          "1 White Drivers Disp. Neg.",
                                          "2 Approx. Equal"))
nc.agency.wide$dif.sr = nc.agency.wide$search_rate_white-
  nc.agency.wide$search_rate_black
nc.agency.wide$dif.sr.cat = ifelse(nc.agency.wide$dif.sr==0,
                                   "2 Apprrox. Equal",
                                   ifelse(nc.agency.wide$dif.sr>0,
                                          "1 White Drivers Disp. Neg.",
                                          "3 Black Drivers Disp. Neg."))
nc.agency.wide$dif.hr = nc.agency.wide$hit_rate_white-
  nc.agency.wide$hit_rate_black
nc.agency.wide$dif.hr.cat = ifelse(nc.agency.wide$dif.hr==0,
                                   "2 Approx. Equal",
                                   ifelse(nc.agency.wide$dif.hr<0,
                                          "1 White Drivers Disp. Neg.",
                                          "3 Black Drivers Disp. Neg."))
colnames(nc.agency.wide)[2:23] = paste(colnames(nc.agency.wide)[2:23],"full",sep="_")

save(nc.agency.wide,file="Data/NC_WideAgencyData_50Full.RData")

#
# Building the baseline data set
#

# Load in the collapsed data sets. 
load("Data/NC_AgencySm_Data_50Thresh_NoOut.RData")
load("Models/NC_FastTT_Agency50Thresh_NoOut.RData")

# Traceplot for a random sample of thresholds. 
set.seed(987)
t_for_trace = sample(1:434,8)
t_names_for_trace = paste("t_i[",t_for_trace,"]",sep="")

png("Figures/NC_Traceplot_AgencyNoOut50.png",1254,769)
traceplot(nc.model.multi,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/NC_SampOffDensity_AgencyNoOut50.png",629,592)
plot(nc.model.multi, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

# Extracting the information + final check.
nc.summary = summary(nc.model.multi)$summary

nc.summary[which(is.na(nc.summary[,10])),]
table(nc.summary[,10]<1)
table(nc.summary[,10]<1.01)

# Cropping the model output to what is needed.
nc.summary.ti = nc.summary[2:435,]
nc.output = data.frame("fasttt.mean" = nc.summary.ti[,1],
                       "fasttt.lower" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2],
                       "fasttt.upper" = nc.summary.ti[,1]-
                         1.96*nc.summary.ti[,2])

# Merging in that information. 
nc.agency.analysis = cbind(nc.dept.sm,nc.output)

# Transforming the data set from long to wide. 
nc.agency.sm.temp = nc.agency.analysis[,c(1:5,7:9)]
nc.agency.wide = tidyr::pivot_wider(data=nc.agency.sm.temp,
                                    id_cols="agency",
                                    names_from="driver_rg",
                                    values_from = c("stops","searches","contraband",
                                                    "fasttt.mean","fasttt.lower",
                                                    "fasttt.upper"))

# Calculating additional values of interest.
nc.agency.wide$search_rate_white = nc.agency.wide$searches_White/
  nc.agency.wide$stops_White
nc.agency.wide$search_rate_black = nc.agency.wide$searches_Black/
  nc.agency.wide$stops_Black
nc.agency.wide$hit_rate_white = ifelse(nc.agency.wide$searches_White>0&
                                         nc.agency.wide$searches_Black>0,
                                       nc.agency.wide$contraband_White/
                                         nc.agency.wide$searches_White,NA)
nc.agency.wide$hit_rate_black = ifelse(nc.agency.wide$searches_Black>0&
                                         nc.agency.wide$searches_White>0,
                                       nc.agency.wide$contraband_Black/
                                         nc.agency.wide$searches_Black,NA)
nc.agency.wide$dif.tt = nc.agency.wide$fasttt.mean_White-
  nc.agency.wide$fasttt.mean_Black
nc.agency.wide$dif.tt.cat = ifelse(nc.agency.wide$dif.tt>0,
                                   ifelse(nc.agency.wide$fasttt.lower_White>
                                            nc.agency.wide$fasttt.upper_Black,
                                          "3 Black Drivers Disp. Neg.",
                                          "2 Approx. Equal"),
                                   ifelse(nc.agency.wide$fasttt.lower_Black>
                                            nc.agency.wide$fasttt.upper_White,
                                          "1 White Drivers Disp. Neg.",
                                          "2 Approx. Equal"))
nc.agency.wide$dif.sr = nc.agency.wide$search_rate_white-
  nc.agency.wide$search_rate_black
nc.agency.wide$dif.sr.cat = ifelse(nc.agency.wide$dif.sr==0,
                                   "2 Apprrox. Equal",
                                   ifelse(nc.agency.wide$dif.sr>0,
                                          "1 White Drivers Disp. Neg.",
                                          "3 Black Drivers Disp. Neg."))
nc.agency.wide$dif.hr = nc.agency.wide$hit_rate_white-
  nc.agency.wide$hit_rate_black
nc.agency.wide$dif.hr.cat = ifelse(nc.agency.wide$dif.hr==0,
                                   "2 Approx. Equal",
                                   ifelse(nc.agency.wide$dif.hr<0,
                                          "1 White Drivers Disp. Neg.",
                                          "3 Black Drivers Disp. Neg."))
colnames(nc.agency.wide)[2:23] = paste(colnames(nc.agency.wide)[2:23],"noout",sep="_")

save(nc.agency.wide,file="Data/NC_WideAgencyData_50NoOut.RData")

#
# Combo
#

rm(list=ls())

# The combined measures
load("Data/NC_WideAgencyData_50Full.RData")
nc.combo = nc.agency.wide
load("Data/NC_WideAgencyData_50NoOut.RData")
nc.combo = cbind(nc.combo,nc.agency.wide[,2:23])

dim(nc.combo)
length(unique(apply(as.matrix(as.character(nc.combo$agency)),1,function(x){strsplit(x,"::",fixed=T)[[1]][1]})))
sum(nc.combo$stops_Black_noout)+sum(nc.combo$stops_White_noout)
sum(nc.combo$searches_Black_noout)+sum(nc.combo$searches_White_noout)

# Identify high disparity officers. 
load("Data/NC_WideOfficerData.RData")
nc.officer.wide$ratio = abs(nc.officer.wide$fasttt.mean_White/nc.officer.wide$fasttt.mean_Black)
summary(nc.officer.wide$ratio)
nc.officer.extreme.bw = boxplot(nc.officer.wide$ratio)
nc.officer.extreme.id = nc.officer.wide$officer[which(nc.officer.wide$ratio%in%
                                                        unique(nc.officer.extreme.bw$out))]
length(nc.officer.extreme.id)
nc.agencies = unique(apply(as.matrix(as.character(nc.officer.wide$officer)),
                           1,
                           function(x){strsplit(x,"::",fixed=T)[[1]][1]}))
nc.agencies.extreme = unique(apply(as.matrix(as.character(nc.officer.wide$officer[which(nc.officer.wide$ratio%in%unique(nc.officer.extreme.bw$out))])),
                                   1,
                                   function(x){strsplit(x,"::",fixed=T)[[1]][1]}))

nc.combo$agency_any = ifelse(tolower(nc.combo$agency) %in% tolower(nc.agencies),1,0)
nc.combo$agency_out = ifelse(tolower(nc.combo$agency) %in% tolower(nc.agencies.extreme),1,0)

table(nc.combo$agency_any,nc.combo$agency_out)

ggplot(nc.combo %>% filter(agency_any==1),
       aes(fasttt.mean_White_full,fasttt.mean_Black_full,
           color=factor(agency_out),shape=factor(agency_out))) + 
  geom_point(size=1.5) + theme_bw(base_size=12) +
  xlab(NULL) + ylab(NULL) +
  geom_abline(intercept = 0, slope = 1,color = "black",size = 0.5) +
  geom_abline(intercept = 0, slope = 2,color = "darkgray",linetype="dashed",size = 0.5) +
  geom_abline(intercept = 0, slope = 0.5,color = "darkgray",linetype="dashed",size = 0.5) +
  xlim(0,5)+ylim(0,5)+
  theme(panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour=NA),
        legend.position = "none") +
  ggtitle("Inferred Thresholds")

nc.combo$fasttt.mean_Black_dif = nc.combo$fasttt.mean_Black_full - 
  nc.combo$fasttt.mean_Black_noout
nc.combo$fasttt.mean_White_dif = nc.combo$fasttt.mean_White_full - 
  nc.combo$fasttt.mean_White_noout
nc.combo$fasttt.black_dif_signif = ifelse(nc.combo$agency_out==1,
                                          ifelse(nc.combo$fasttt.mean_Black_dif<0,
                                                 ifelse(nc.combo$fasttt.mean_Black_full>
                                                          nc.combo$fasttt.lower_Black_noout & 
                                                          nc.combo$fasttt.mean_Black_noout<
                                                          nc.combo$fasttt.upper_White_full,1,0),
                                                 ifelse(nc.combo$fasttt.mean_Black_full<
                                                          nc.combo$fasttt.upper_Black_noout & 
                                                          nc.combo$fasttt.mean_Black_noout>
                                                          nc.combo$fasttt.lower_White_full,1,0)),
                                          NA)

nc.combo$thresh.black.signif = ifelse(abs(nc.combo$fasttt.mean_Black_dif) < 
                                        abs(nc.combo$fasttt.mean_Black_full-nc.combo$fasttt.upper_Black_full)&
                                        abs(nc.combo$fasttt.mean_Black_dif) < 
                                        abs(nc.combo$fasttt.mean_Black_noout-nc.combo$fasttt.upper_Black_noout),
                                      1,0)

nc.combo.sm = nc.combo %>% filter(agency_out==1)

mean(nc.combo.sm$fasttt.mean_White_full/nc.combo.sm$fasttt.mean_Black_full)
mean(nc.combo.sm$fasttt.mean_White_noout/nc.combo.sm$fasttt.mean_Black_noout)
t.test(nc.combo.sm$fasttt.mean_White_full/nc.combo.sm$fasttt.mean_Black_full,
       nc.combo.sm$fasttt.mean_White_noout/nc.combo.sm$fasttt.mean_Black_noout,paired = T)

mean(nc.combo.sm$fasttt.mean_Black_full)
mean(nc.combo.sm$fasttt.mean_Black_noout)
t.test(nc.combo.sm$fasttt.mean_Black_full,
       nc.combo.sm$fasttt.mean_Black_noout,paired = T)

mean(nc.combo.sm$fasttt.mean_White_full)
mean(nc.combo.sm$fasttt.mean_White_noout)
t.test(nc.combo.sm$fasttt.mean_White_full,
       nc.combo.sm$fasttt.mean_White_noout,paired = T)

t.test(nc.combo.sm$fasttt.mean_White_full-nc.combo.sm$fasttt.mean_White_noout,
       nc.combo.sm$fasttt.mean_Black_full-nc.combo.sm$fasttt.mean_Black_noout,
       paired = T)

sd(nc.combo.sm$fasttt.mean_White_full-nc.combo.sm$fasttt.mean_White_noout)
sd(nc.combo.sm$fasttt.mean_Black_full-nc.combo.sm$fasttt.mean_Black_noout)
sd(nc.combo.sm$fasttt.mean_White_noout/nc.combo.sm$fasttt.mean_Black_noout)


prop.table(table(nc.combo$dif.sr.cat_noout))
prop.table(table(nc.combo$dif.tt.cat_noout))

fig2d = ggplot(nc.combo,aes(fasttt.mean_White_noout,fasttt.mean_Black_noout)) + 
  geom_point(size=1.5) + theme_bw(base_size=15) +
  xlab(NULL) + ylab(NULL) +
  geom_abline(intercept = 0, slope = 1,color = "darkgray",size = 0.5) +
  geom_abline(intercept = 0, slope = 2,color = "darkgray",linetype="dashed",size = 0.5) +
  geom_abline(intercept = 0, slope = 0.5,color = "darkgray",linetype="dashed",size = 0.5) +
  xlim(0,4)+ylim(0,4)+
  theme(panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour=NA),
        legend.position = "none")
ggsave(plot=fig2d,filename = "Figures/Figure2d.eps")
png("Figures/NC_TT_NoOut.png",547,441)
fig2d
dev.off()

fig2c = ggplot(nc.combo,aes(search_rate_white_noout,search_rate_black_noout)) + 
  geom_point(size=1.5) + theme_bw(base_size=15) +
  xlab(NULL) + ylab(NULL) +
  geom_abline(intercept = 0, slope = 1,color = "darkgray",size = 0.5) +
  geom_abline(intercept = 0, slope = 2,color = "darkgray",linetype="dashed",size = 0.5) +
  geom_abline(intercept = 0, slope = 0.5,color = "darkgray",linetype="dashed",size = 0.5) +
  xlim(0,0.25)+ylim(0,0.25)+
  theme(panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour=NA),
        legend.position = "none")
ggsave(plot=fig2c,filename = "Figures/Figure2c.eps")
png("Figures/NC_SR_NoOut.png",547,441)
fig2c
dev.off()
